In [2]:
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import seaborn as sns
import tqdm
%matplotlib inline
sns.set(font_scale=1.5)
In [14]:
def metropolis(func, steps=10000):
"""A very simple Metropolis implementation"""
samples = np.zeros(steps)
# start at mean for heck of it
old_x = func.mean()
# get the prob density at this point
old_prob = func.pdf(old_x)
for i in range(steps):
# take a small step some direction
new_x = old_x + np.random.normal(0, 0.5)
# get the prob density
new_prob = func.pdf(new_x)
# compute acceptace parameter
acceptance = new_prob/old_prob
# if accepted (bigger than some rangom number)
# the more better the new point it the higher chance it is accepted
if acceptance >= np.random.random():
# keep the new point and make new the old point
samples[i] = new_x
old_x = new_x
old_prob = new_prob
else:
# if rejected keep the old point
samples[i] = old_x
return samples
In [21]:
np.random.seed(2)
func = stats.beta(0.4, 2)
samples = metropolis(func=func)
x = np.linspace(0.01, .99, 100)
y = func.pdf(x)
plt.xlim(0, 1)
plt.plot(x, y, 'r-', lw=3, label='True distribution')
plt.hist(samples, bins=32, density=True, label='Estimated distribution')
plt.xlabel('$x$', fontsize=14)
plt.ylabel('$pdf(x)$', fontsize=14)
plt.legend(fontsize=14)
Out[21]:
In [22]:
np.random.seed(2)
func = stats.norm(10, 2)
samples = metropolis(func=func)
x = np.linspace(0.01, 25, 1000)
y = func.pdf(x)
# plt.xlim(0, 1)
plt.plot(x, y, 'r-', lw=3, label='True distribution')
plt.hist(samples, bins=32, density=True, label='Estimated distribution')
plt.xlabel('$x$', fontsize=14)
plt.ylabel('$pdf(x)$', fontsize=14)
plt.legend(fontsize=14)
Out[22]:
In [30]:
def metropolis_extra(func, steps=10000):
"""A very simple Metropolis implementation"""
samples = np.zeros(steps)
accept = np.zeros(steps)
accept2 = np.zeros(steps)
# start at mean for heck of it
old_x = func.mean()
# get the prob density at this point
old_prob = func.pdf(old_x)
for i in range(steps):
# take a small step some direction
new_x = old_x + np.random.normal(0, 0.5)
# get the prob density
new_prob = func.pdf(new_x)
# compute acceptace parameter
acceptance = new_prob/old_prob
accept[i] = acceptance
# if accepted (bigger than some rangom number)
# the more better the new point it the higher chance it is accepted
rnd = np.random.random()
accept2[i] = rnd
if acceptance >= rnd:
# keep the new point and make new the old point
samples[i] = new_x
old_x = new_x
old_prob = new_prob
else:
# if rejected keep the old point
samples[i] = old_x
return samples, accept, accept2
In [45]:
np.random.seed(2)
func = stats.norm(10, 2)
samples, accept, accept2 = metropolis_extra(func=func)
x = np.linspace(0.01, 25, 10)
y = func.pdf(x)
# plt.xlim(0, 1)
plt.plot(x, y, 'r-', lw=3, label='True distribution')
plt.hist(samples, bins=32, density=True, label='Estimated distribution')
plt.xlabel('$x$', fontsize=14)
plt.ylabel('$pdf(x)$', fontsize=14)
plt.legend(fontsize=14)
plt.figure()
plt.plot(accept, accept2, '.')
plt.figure()
plt.hist(accept, bins=20, density=True)
plt.figure()
plt.hist((accept>accept2).astype(int), bins=[-1,0, .5,1, 1.5], density=True)
Out[45]:
In [44]:
(accept>accept2)
Out[44]:
In [1]:
%matplotlib inline
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
sns.set_style('white')
sns.set_context('talk')
np.random.seed(123)
In [2]:
data = np.random.randn(20)
In [4]:
ax = plt.subplot()
sns.distplot(data, kde=False, ax=ax)
_ = ax.set(title='Histogram of observed data', xlabel='x', ylabel='# observations');
In [5]:
def calc_posterior_analytical(data, x, mu_0, sigma_0):
sigma = 1.
n = len(data)
mu_post = (mu_0 / sigma_0**2 + data.sum() / sigma**2) / (1. / sigma_0**2 + n / sigma**2)
sigma_post = (1. / sigma_0**2 + n / sigma**2)**-1
return norm(mu_post, np.sqrt(sigma_post)).pdf(x)
ax = plt.subplot()
x = np.linspace(-1, 1, 500)
posterior_analytical = calc_posterior_analytical(data, x, 0., 1.)
ax.plot(x, posterior_analytical)
ax.set(xlabel='mu', ylabel='belief', title='Analytical posterior');
sns.despine()
In [8]:
def sampler(data, samples=4, mu_init=.5, proposal_width=.5, plot=False, mu_prior_mu=0, mu_prior_sd=1.):
mu_current = mu_init
posterior = [mu_current]
for i in range(samples):
# suggest new position
mu_proposal = norm(mu_current, proposal_width).rvs()
# Compute likelihood by multiplying probabilities of each data point
likelihood_current = norm(mu_current, 1).pdf(data).prod()
likelihood_proposal = norm(mu_proposal, 1).pdf(data).prod()
# Compute prior probability of current and proposed mu
prior_current = norm(mu_prior_mu, mu_prior_sd).pdf(mu_current)
prior_proposal = norm(mu_prior_mu, mu_prior_sd).pdf(mu_proposal)
p_current = likelihood_current * prior_current
p_proposal = likelihood_proposal * prior_proposal
# Accept proposal?
p_accept = p_proposal / p_current
# Usually would include prior probability, which we neglect here for simplicity
accept = np.random.rand() < p_accept
if plot:
plot_proposal(mu_current, mu_proposal, mu_prior_mu, mu_prior_sd, data, accept, posterior, i)
if accept:
# Update position
mu_current = mu_proposal
posterior.append(mu_current)
return np.array(posterior)
# Function to display
def plot_proposal(mu_current, mu_proposal, mu_prior_mu, mu_prior_sd, data, accepted, trace, i):
from copy import copy
trace = copy(trace)
fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, figsize=(16, 4))
fig.suptitle('Iteration %i' % (i + 1))
x = np.linspace(-3, 3, 5000)
color = 'g' if accepted else 'r'
# Plot prior
prior_current = norm(mu_prior_mu, mu_prior_sd).pdf(mu_current)
prior_proposal = norm(mu_prior_mu, mu_prior_sd).pdf(mu_proposal)
prior = norm(mu_prior_mu, mu_prior_sd).pdf(x)
ax1.plot(x, prior)
ax1.plot([mu_current] * 2, [0, prior_current], marker='o', color='b')
ax1.plot([mu_proposal] * 2, [0, prior_proposal], marker='o', color=color)
ax1.annotate("", xy=(mu_proposal, 0.2), xytext=(mu_current, 0.2),
arrowprops=dict(arrowstyle="->", lw=2.))
ax1.set(ylabel='Probability Density', title='current: prior(mu=%.2f) = %.2f\nproposal: prior(mu=%.2f) = %.2f' % (mu_current, prior_current, mu_proposal, prior_proposal))
# Likelihood
likelihood_current = norm(mu_current, 1).pdf(data).prod()
likelihood_proposal = norm(mu_proposal, 1).pdf(data).prod()
y = norm(loc=mu_proposal, scale=1).pdf(x)
sns.distplot(data, kde=False, norm_hist=True, ax=ax2)
ax2.plot(x, y, color=color)
ax2.axvline(mu_current, color='b', linestyle='--', label='mu_current')
ax2.axvline(mu_proposal, color=color, linestyle='--', label='mu_proposal')
#ax2.title('Proposal {}'.format('accepted' if accepted else 'rejected'))
ax2.annotate("", xy=(mu_proposal, 0.2), xytext=(mu_current, 0.2),
arrowprops=dict(arrowstyle="->", lw=2.))
ax2.set(title='likelihood(mu=%.2f) = %.2f\nlikelihood(mu=%.2f) = %.2f' % (mu_current, 1e14*likelihood_current, mu_proposal, 1e14*likelihood_proposal))
# Posterior
posterior_analytical = calc_posterior_analytical(data, x, mu_prior_mu, mu_prior_sd)
ax3.plot(x, posterior_analytical)
posterior_current = calc_posterior_analytical(data, mu_current, mu_prior_mu, mu_prior_sd)
posterior_proposal = calc_posterior_analytical(data, mu_proposal, mu_prior_mu, mu_prior_sd)
ax3.plot([mu_current] * 2, [0, posterior_current], marker='o', color='b')
ax3.plot([mu_proposal] * 2, [0, posterior_proposal], marker='o', color=color)
ax3.annotate("", xy=(mu_proposal, 0.2), xytext=(mu_current, 0.2),
arrowprops=dict(arrowstyle="->", lw=2.))
#x3.set(title=r'prior x likelihood $\propto$ posterior')
ax3.set(title='posterior(mu=%.2f) = %.5f\nposterior(mu=%.2f) = %.5f' % (mu_current, posterior_current, mu_proposal, posterior_proposal))
if accepted:
trace.append(mu_proposal)
else:
trace.append(mu_current)
ax4.plot(trace)
ax4.set(xlabel='iteration', ylabel='mu', title='trace')
plt.tight_layout()
#plt.legend()
In [9]:
np.random.seed(123)
sampler(data, samples=8, mu_init=-1., plot=True);
In [10]:
posterior = sampler(data, samples=15000, mu_init=1.)
fig, ax = plt.subplots()
ax.plot(posterior)
_ = ax.set(xlabel='sample', ylabel='mu');
In [11]:
ax = plt.subplot()
sns.distplot(posterior[500:], ax=ax, label='estimated posterior')
x = np.linspace(-.5, .5, 500)
post = calc_posterior_analytical(data, x, 0, 1)
ax.plot(x, post, 'g', label='analytic posterior')
_ = ax.set(xlabel='mu', ylabel='belief');
ax.legend();
In [12]:
posterior_small = sampler(data, samples=5000, mu_init=1., proposal_width=.01)
fig, ax = plt.subplots()
ax.plot(posterior_small);
_ = ax.set(xlabel='sample', ylabel='mu');
In [13]:
posterior_large = sampler(data, samples=5000, mu_init=1., proposal_width=3.)
fig, ax = plt.subplots()
ax.plot(posterior_large); plt.xlabel('sample'); plt.ylabel('mu');
_ = ax.set(xlabel='sample', ylabel='mu');
In [14]:
sns.distplot(posterior_small[1000:], label='Small step size')
sns.distplot(posterior_large[1000:], label='Large step size');
_ = plt.legend();
In [15]:
from pymc3.stats import autocorr
lags = np.arange(1, 100)
fig, ax = plt.subplots()
ax.plot(lags, [autocorr(posterior_large, l) for l in lags], label='large step size')
ax.plot(lags, [autocorr(posterior_small, l) for l in lags], label='small step size')
ax.plot(lags, [autocorr(posterior, l) for l in lags], label='medium step size')
ax.legend(loc=0)
_ = ax.set(xlabel='lag', ylabel='autocorrelation', ylim=(-.1, 1))
In [19]:
import pymc3 as pm
with pm.Model():
mu = pm.Normal('mu', 0, 1)
sigma = 1.
returns = pm.Normal('returns', mu=mu, sd=sigma, observed=data)
step = pm.Metropolis()
trace = pm.sample(15000, step)
In [20]:
plt.subplots(figsize=(10,6))
sns.distplot(trace[2000:]['mu'], label='PyMC3 sampler');
sns.distplot(posterior[500:], label='Hand-written sampler');
plt.legend();
In [21]:
with pm.Model():
mu = pm.Normal('mu', 0, 1)
sigma = 1.
returns = pm.Normal('returns', mu=mu, sd=sigma, observed=data)
step = pm.NUTS()
trace2 = pm.sample(15000, step)
In [22]:
plt.subplots(figsize=(10,6))
sns.distplot(trace[2000:]['mu'], label='PyMC3 sampler');
sns.distplot(trace2[2000:]['mu'], label='PyMC3 NUTS');
sns.distplot(posterior[500:], label='Hand-written sampler');
plt.legend();
In [26]:
with pm.Model():
mu = pm.Normal('mu', 0, 1)
sigma = 1.
returns = pm.Normal('returns', mu=mu, sd=sigma, observed=data)
step = pm.NUTS()
trace2 = pm.sample(1000, step)
plt.subplots(figsize=(10,6))
sns.distplot(trace[2000:]['mu'], label='PyMC3 sampler');
sns.distplot(trace2['mu'], label='PyMC3 NUTS');
sns.distplot(posterior[500:], label='Hand-written sampler');
plt.legend();
In [33]:
pm.plot_autocorr(trace2)
plt.suptitle('NUTS')
Out[33]:
In [34]:
pm.plot_autocorr(trace)
plt.suptitle('Metropolis')
Out[34]:
In [35]:
pm.plot_autocorr(posterior)
plt.suptitle('Simple')
Out[35]:
In [30]:
plt.subplots(figsize=(10,6))
sns.distplot(trace[2000::15]['mu'], label='PyMC3 sampler');
sns.distplot(trace2['mu'][::5], label='PyMC3 NUTS');
sns.distplot(posterior[500::10], label='Hand-written sampler');
plt.legend()
Out[30]:
In [ ]: